In [293]:
# Import Libraries
In [424]:
import os
import logging
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Conv2D, MaxPooling2D, 
    Embedding, Dropout
)
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.svm import SVC
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.feature_selection import RFE
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from imblearn.over_sampling import SMOTE
from scipy.stats import zscore
from collections import Counter
from joblib import Parallel, delayed
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
In [425]:
# Dataset paths

train_path = "Dataset-HAR-Monitoring/train.csv"
test_path = "Dataset-HAR-Monitoring/test.csv"
In [426]:
# Load Data
In [427]:
def load_data(train_path, test_path):
    """Load train and test datasets."""
    
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    return train, test
In [428]:
# Define target_names
target_names = ['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS', 'WALKING_UPSTAIRS']
In [429]:
# EDA Check
In [430]:
def EDA_check(train, test):
    """Perform exploratory data analysis."""
    print("\n--- Performing EDA ---")
    data_overview(train, test)
    basic_statistics(train)
    class_distribution(train)
    missing_data(train)
    feature_relationships(train)
    feature_distributions(train)
In [431]:
# Sub Methods for EDA
In [432]:
# Data Overview

def data_overview(train, test):
    """Display basic information about the datasets."""
    print("\n--- TRAIN DATA OVERVIEW ---")
    print(train.info())
    print(train.head())
    print("Shape:", train.shape)

    print("\n--- TEST DATA OVERVIEW ---")
    print(test.info())
    print(test.head())
    print("Shape:", test.shape)
In [433]:
# Basic stats

def basic_statistics(train):
    """Display basic statistics and column information."""
    print("\n--- BASIC STATISTICS ---")
    print(train.describe())
    print("\nUnique values per column:")
    print(train.nunique())

    cat_cols = train.select_dtypes(include=['object']).columns
    num_cols = train.select_dtypes(include=['float64', 'int64']).columns
    print("\nCategorical Columns:", cat_cols)
    print("\nNumerical Columns:", num_cols)
In [434]:
# Class Distribution

def class_distribution(train):
    """Plot the distribution of target classes."""
    plt.figure(figsize=(8, 6))
    sns.countplot(x='Activity', data=train)
    plt.title('Class Distribution')
    plt.xticks(rotation=45)
    plt.show()
In [435]:
# Identify missing data 

def missing_data(train):
    """Check for missing values and duplicates."""
    print("\n--- MISSING DATA ---")
    missing = train.isnull().sum()
    print(missing[missing > 0])

    print("\n--- DUPLICATES ---")
    duplicates = train.duplicated().sum()
    print(f"Number of duplicate rows: {duplicates}")
In [436]:
# Feature Correlation Matrix

def feature_relationships(train):
    """Plot feature correlation matrix."""
    numeric_train = train.select_dtypes(include=['float64', 'int64'])
    correlation_matrix = numeric_train.corr()
    plt.figure(figsize=(12, 8))
    sns.heatmap(correlation_matrix, cmap='coolwarm', annot=True, linewidths=0.1)
    plt.title('Feature Correlation Matrix')
    plt.show()
In [437]:
# Feature Distribution

def feature_distributions(train):
    """Plot feature distributions."""
    num_features = train.shape[1]
    features_per_plot = 100  # Number of features per subplot
    num_plots = int(np.ceil(num_features / features_per_plot))

    # Plot histograms in batches
    for i in range(num_plots):
        start_col = i * features_per_plot
        end_col = min((i + 1) * features_per_plot, num_features)

        train.iloc[:, start_col:end_col].hist(figsize=(16, 12), bins=30)
        plt.suptitle(f'Feature Distribution (Columns {start_col+1} to {end_col})')
        plt.show()

    # Plot boxplots in smaller groups
    boxplot_features = 100  # Number of features per boxplot
    num_boxplots = int(np.ceil(num_features / boxplot_features))

    for i in range(num_boxplots):
        start_col = i * boxplot_features
        end_col = min((i + 1) * boxplot_features, num_features)

        plt.figure(figsize=(12, 6))
        sns.boxplot(data=train.iloc[:, start_col:end_col])
        plt.title(f'Boxplot of Features {start_col+1} to {end_col}')
        plt.xticks(rotation=90)
        plt.show()
In [438]:
# Data Preprocessing
In [439]:
from sklearn.preprocessing import OneHotEncoder

def data_preprocessing(train, test):
    """Preprocess data: scaling, PCA, and feature selection."""
    print("\n--- Preprocessing Data ---")
    X_train, X_test = remove_highly_correlated_features(train, test)
    X_train, X_test, vif_data = remove_high_vif_features(X_train, X_test)

    # Separate features and target
    X_train_cleaned = X_train.drop(columns=['Activity'], errors='ignore')
    X_test_cleaned = X_test.drop(columns=['Activity'], errors='ignore')

    # Encode the 'subject' column using OneHotEncoding
    subject_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    X_train_subject_encoded = subject_encoder.fit_transform(X_train[['subject']])
    X_test_subject_encoded = subject_encoder.transform(X_test[['subject']])

    # Drop the original 'subject' column
    X_train_cleaned = X_train_cleaned.drop(columns=['subject'], errors='ignore')
    X_test_cleaned = X_test_cleaned.drop(columns=['subject'], errors='ignore')

    # Apply scaling and PCA
    X_train_scaled, X_test_scaled, scaler, pca = preprocess_data(X_train_cleaned, X_test_cleaned)

    # Combine scaled features with encoded subject data
    X_train_final = np.hstack((X_train_scaled, X_train_subject_encoded))
    X_test_final = np.hstack((X_test_scaled, X_test_subject_encoded))

    # Encode target variable
    activity_encoder = LabelEncoder()
    y_train = activity_encoder.fit_transform(X_train['Activity'])
    y_test = activity_encoder.transform(X_test['Activity'])

    # Check class imbalance before SMOTE
    check_class_imbalance(y_train, activity_encoder)

    # Handle class imbalance
    X_resampled, y_resampled = handle_class_imbalance(X_train_final, y_train)

    return X_resampled, y_resampled, X_test_final, y_test, scaler, pca, activity_encoder, subject_encoder
In [440]:
# Remove correletaed Features
In [441]:
def remove_highly_correlated_features(train, test, threshold=0.97):
    """Remove features with correlation above the threshold."""
    activity_train = train['Activity'] if 'Activity' in train.columns else None
    subject_train = train['subject'] if 'subject' in train.columns else None
    activity_test = test['Activity'] if 'Activity' in test.columns else None
    subject_test = test['subject'] if 'subject' in test.columns else None

    numeric_train = train.select_dtypes(include=['float64', 'int64'])
    X_train = numeric_train.drop(columns=['subject', 'Activity'], errors='ignore')

    corr_matrix = X_train.corr().abs()
    upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > threshold)]
    print(f"Removing {len(to_drop)} highly correlated features: {to_drop}")

    X_train = X_train.drop(columns=to_drop)
    X_test = test.drop(columns=to_drop, errors='ignore')

    if activity_train is not None:
        X_train['Activity'] = activity_train
    if subject_train is not None:
        X_train['subject'] = subject_train
    if activity_test is not None:
        X_test['Activity'] = activity_test
    if subject_test is not None:
        X_test['subject'] = subject_test

    return X_train, X_test
In [442]:
# Remove High VIF Features

def remove_high_vif_features(train, test, threshold=10):
    """Remove features with high VIF."""
    activity_train = train['Activity'] if 'Activity' in train.columns else None
    subject_train = train['subject'] if 'subject' in train.columns else None
    activity_test = test['Activity'] if 'Activity' in test.columns else None
    subject_test = test['subject'] if 'subject' in test.columns else None

    numeric_train = train.select_dtypes(include=['float64', 'int64'])
    X = numeric_train.drop(columns=['subject', 'Activity'], errors='ignore').copy()

    vif_values = svd_vif(X)
    vif_data = pd.DataFrame({"Feature": X.columns, "VIF": vif_values})
    vif_data = vif_data.sort_values(by="VIF", ascending=False)

    high_vif_features = vif_data[vif_data['VIF'] > threshold]['Feature'].tolist()
    print(f"Dropping {len(high_vif_features)} features with VIF > {threshold}: {high_vif_features}")

    train_reduced = train.drop(columns=high_vif_features, errors='ignore')
    test_reduced = test.drop(columns=high_vif_features, errors='ignore')

    return train_reduced, test_reduced, vif_data


def svd_vif(X):
    """Compute VIF using Singular Value Decomposition."""
    U, s, Vt = np.linalg.svd(X, full_matrices=False)
    vif_values = 1 / (s ** 2)
    return vif_values
In [443]:
# Preprocessor Data , scaling, PCA, and feature selection
In [444]:
def preprocess_data(X_train, X_test, variance_threshold=0.97):
    # Normalize features
    scaler = MinMaxScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
    X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    # Apply PCA
    pca = PCA(n_components=variance_threshold)  # Retain 97% variance
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    # Use original feature names for PCA components
    loadings = pd.DataFrame(pca.components_, columns=X_train.columns)

    # Create descriptive names for each principal component using all contributing features
    feature_names = ["_".join(loadings.iloc[i].index.tolist()) for i in range(loadings.shape[0])]

    # Assign human-readable column names
    X_train_pca = pd.DataFrame(X_train_pca, columns=feature_names)
    X_test_pca = pd.DataFrame(X_test_pca, columns=feature_names)

    return X_train_pca, X_test_pca, scaler, pca
In [445]:
# Check class imbalance
In [446]:
def check_class_imbalance(y_train, encoder):
    """Checks and visualizes class imbalance with actual activity names."""
    label_counts = pd.Series(y_train).value_counts()

    # Convert numeric labels back to original activity names
    activity_labels = encoder.inverse_transform(label_counts.index)

    # Define colors for the pie chart
    colors = ['lightblue', 'lightgreen', 'lightcoral', 'gold', 'pink', 'purple']

    # Plot class distribution with activity names
    plt.figure(figsize=(6, 6))
    plt.pie(label_counts, labels=activity_labels, autopct='%1.1f%%', startangle=140, colors=colors)
    plt.title("Distribution of Activity Classes")
    plt.show()

    return label_counts
In [447]:
# Handle class imbalance
In [448]:
def handle_class_imbalance(X_train, y_train):
    """Apply SMOTE to handle class imbalance."""
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
    return X_resampled, y_resampled
In [449]:
def tune_hyperparameters(X_train, y_train):
    #Perform hyperparameter tuning using GridSearchCV.

    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }
    grid_search = GridSearchCV(
        RandomForestClassifier(random_state=42),
        param_grid,
        cv=5,
        scoring='accuracy',
        n_jobs=-1
    )
    grid_search.fit(X_train, y_train)
    print(f"Best hyperparameters: {grid_search.best_params_}")
    return grid_search.best_estimator_
In [467]:
def randomforest_model_training_and_evaluation(algorithm_name, X_train, y_train, X_test, y_test):

#  Train and evaluate a Random Forest model.
# model (RandomForestClassifier): Trained Random Forest model.

    print(f"\n--- Training and Evaluating {algorithm_name} Model ---")

    # Hyperparameter tuning
    best_model = tune_hyperparameters(X_train, y_train)

    # Initialize the model with the best hyperparameters
    model = RandomForestClassifier(
        n_estimators=best_model.n_estimators,
        max_depth=best_model.max_depth,
        min_samples_split=best_model.min_samples_split, random_state=42, class_weight="balanced"
    )
    print(f"{algorithm_name} model initialized with tuned hyperparameters.")

    # Perform cross-validation
    logging.info("Performing cross-validation...")
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy')
    print(f"Cross-validation accuracy scores: {cv_scores}")
    print(f"Mean cross-validation accuracy: {cv_scores.mean():.4f}")

    # Initialize the model
    print(f"{algorithm_name} model trained on the full training set.")

    # Train the model
    model.fit(X_train, y_train)
    print("Model trained.")

    # Make predictions
    y_pred_random_forest = model.predict(X_test)
    print("Predictions made on the test set.")

    # Evaluate model performance
    evaluate_model(algorithm_name, y_test, y_pred)
    print("Model evaluated.")
    return model,y_pred_random_forest
In [468]:
def plot_confusion_matrix(cm, y_test):
  #  Plot and save the confusion matrix.

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.show()
In [469]:
def evaluate_model(algorithm_name, y_test, y_pred):

    # Evaluate the model's performance on the test set.

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    report = classification_report(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    # Print evaluation metrics
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"Test F1-score: {f1:.4f}")
    print(f"Test Precision: {precision:.4f}")
    print(f"Test Recall: {recall:.4f}")
    print("\nClassification Report:\n" + report)

    # Plot confusion matrix
    plot_confusion_matrix(cm, y_test)
    return accuracy, report, cm
In [453]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [454]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [455]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [456]:
# DATA LOADING

# EDA Check

# Data Preprocessing
In [457]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [458]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [459]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [460]:
# Load data
train, test = load_data(train_path, test_path)

# Perform EDA
EDA_check(train, test)

# Preprocess data
X_train, y_train, X_test, y_test, scaler, pca, activity_encoder, subject_encoder = data_preprocessing(train, test)


#X_train_final, y_train, X_test_final, y_test, scaler, pca, activity_encoder, subject_encoder = data_preprocessing(train, test)
--- Performing EDA ---

--- TRAIN DATA OVERVIEW ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7352 entries, 0 to 7351
Columns: 563 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), int64(1), object(1)
memory usage: 31.6+ MB
None
   tBodyAcc-mean()-X  tBodyAcc-mean()-Y  tBodyAcc-mean()-Z  tBodyAcc-std()-X  \
0           0.288585          -0.020294          -0.132905         -0.995279   
1           0.278419          -0.016411          -0.123520         -0.998245   
2           0.279653          -0.019467          -0.113462         -0.995380   
3           0.279174          -0.026201          -0.123283         -0.996091   
4           0.276629          -0.016570          -0.115362         -0.998139   

   tBodyAcc-std()-Y  tBodyAcc-std()-Z  tBodyAcc-mad()-X  tBodyAcc-mad()-Y  \
0         -0.983111         -0.913526         -0.995112         -0.983185   
1         -0.975300         -0.960322         -0.998807         -0.974914   
2         -0.967187         -0.978944         -0.996520         -0.963668   
3         -0.983403         -0.990675         -0.997099         -0.982750   
4         -0.980817         -0.990482         -0.998321         -0.979672   

   tBodyAcc-mad()-Z  tBodyAcc-max()-X  ...  fBodyBodyGyroJerkMag-kurtosis()  \
0         -0.923527         -0.934724  ...                        -0.710304   
1         -0.957686         -0.943068  ...                        -0.861499   
2         -0.977469         -0.938692  ...                        -0.760104   
3         -0.989302         -0.938692  ...                        -0.482845   
4         -0.990441         -0.942469  ...                        -0.699205   

   angle(tBodyAccMean,gravity)  angle(tBodyAccJerkMean),gravityMean)  \
0                    -0.112754                              0.030400   
1                     0.053477                             -0.007435   
2                    -0.118559                              0.177899   
3                    -0.036788                             -0.012892   
4                     0.123320                              0.122542   

   angle(tBodyGyroMean,gravityMean)  angle(tBodyGyroJerkMean,gravityMean)  \
0                         -0.464761                             -0.018446   
1                         -0.732626                              0.703511   
2                          0.100699                              0.808529   
3                          0.640011                             -0.485366   
4                          0.693578                             -0.615971   

   angle(X,gravityMean)  angle(Y,gravityMean)  angle(Z,gravityMean)  subject  \
0             -0.841247              0.179941             -0.058627        1   
1             -0.844788              0.180289             -0.054317        1   
2             -0.848933              0.180637             -0.049118        1   
3             -0.848649              0.181935             -0.047663        1   
4             -0.847865              0.185151             -0.043892        1   

   Activity  
0  STANDING  
1  STANDING  
2  STANDING  
3  STANDING  
4  STANDING  

[5 rows x 563 columns]
Shape: (7352, 563)

--- TEST DATA OVERVIEW ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2947 entries, 0 to 2946
Columns: 563 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), int64(1), object(1)
memory usage: 12.7+ MB
None
   tBodyAcc-mean()-X  tBodyAcc-mean()-Y  tBodyAcc-mean()-Z  tBodyAcc-std()-X  \
0           0.257178          -0.023285          -0.014654         -0.938404   
1           0.286027          -0.013163          -0.119083         -0.975415   
2           0.275485          -0.026050          -0.118152         -0.993819   
3           0.270298          -0.032614          -0.117520         -0.994743   
4           0.274833          -0.027848          -0.129527         -0.993852   

   tBodyAcc-std()-Y  tBodyAcc-std()-Z  tBodyAcc-mad()-X  tBodyAcc-mad()-Y  \
0         -0.920091         -0.667683         -0.952501         -0.925249   
1         -0.967458         -0.944958         -0.986799         -0.968401   
2         -0.969926         -0.962748         -0.994403         -0.970735   
3         -0.973268         -0.967091         -0.995274         -0.974471   
4         -0.967445         -0.978295         -0.994111         -0.965953   

   tBodyAcc-mad()-Z  tBodyAcc-max()-X  ...  fBodyBodyGyroJerkMag-kurtosis()  \
0         -0.674302         -0.894088  ...                        -0.705974   
1         -0.945823         -0.894088  ...                        -0.594944   
2         -0.963483         -0.939260  ...                        -0.640736   
3         -0.968897         -0.938610  ...                        -0.736124   
4         -0.977346         -0.938610  ...                        -0.846595   

   angle(tBodyAccMean,gravity)  angle(tBodyAccJerkMean),gravityMean)  \
0                     0.006462                              0.162920   
1                    -0.083495                              0.017500   
2                    -0.034956                              0.202302   
3                    -0.017067                              0.154438   
4                    -0.002223                             -0.040046   

   angle(tBodyGyroMean,gravityMean)  angle(tBodyGyroJerkMean,gravityMean)  \
0                         -0.825886                              0.271151   
1                         -0.434375                              0.920593   
2                          0.064103                              0.145068   
3                          0.340134                              0.296407   
4                          0.736715                             -0.118545   

   angle(X,gravityMean)  angle(Y,gravityMean)  angle(Z,gravityMean)  subject  \
0             -0.720009              0.276801             -0.057978        2   
1             -0.698091              0.281343             -0.083898        2   
2             -0.702771              0.280083             -0.079346        2   
3             -0.698954              0.284114             -0.077108        2   
4             -0.692245              0.290722             -0.073857        2   

   Activity  
0  STANDING  
1  STANDING  
2  STANDING  
3  STANDING  
4  STANDING  

[5 rows x 563 columns]
Shape: (2947, 563)

--- BASIC STATISTICS ---
       tBodyAcc-mean()-X  tBodyAcc-mean()-Y  tBodyAcc-mean()-Z  \
count        7352.000000        7352.000000        7352.000000   
mean            0.274488          -0.017695          -0.109141   
std             0.070261           0.040811           0.056635   
min            -1.000000          -1.000000          -1.000000   
25%             0.262975          -0.024863          -0.120993   
50%             0.277193          -0.017219          -0.108676   
75%             0.288461          -0.010783          -0.097794   
max             1.000000           1.000000           1.000000   

       tBodyAcc-std()-X  tBodyAcc-std()-Y  tBodyAcc-std()-Z  tBodyAcc-mad()-X  \
count       7352.000000       7352.000000       7352.000000       7352.000000   
mean          -0.605438         -0.510938         -0.604754         -0.630512   
std            0.448734          0.502645          0.418687          0.424073   
min           -1.000000         -0.999873         -1.000000         -1.000000   
25%           -0.992754         -0.978129         -0.980233         -0.993591   
50%           -0.946196         -0.851897         -0.859365         -0.950709   
75%           -0.242813         -0.034231         -0.262415         -0.292680   
max            1.000000          0.916238          1.000000          1.000000   

       tBodyAcc-mad()-Y  tBodyAcc-mad()-Z  tBodyAcc-max()-X  ...  \
count       7352.000000       7352.000000       7352.000000  ...   
mean          -0.526907         -0.606150         -0.468604  ...   
std            0.485942          0.414122          0.544547  ...   
min           -1.000000         -1.000000         -1.000000  ...   
25%           -0.978162         -0.980251         -0.936219  ...   
50%           -0.857328         -0.857143         -0.881637  ...   
75%           -0.066701         -0.265671         -0.017129  ...   
max            0.967664          1.000000          1.000000  ...   

       fBodyBodyGyroJerkMag-skewness()  fBodyBodyGyroJerkMag-kurtosis()  \
count                      7352.000000                      7352.000000   
mean                         -0.307009                        -0.625294   
std                           0.321011                         0.307584   
min                          -0.995357                        -0.999765   
25%                          -0.542602                        -0.845573   
50%                          -0.343685                        -0.711692   
75%                          -0.126979                        -0.503878   
max                           0.989538                         0.956845   

       angle(tBodyAccMean,gravity)  angle(tBodyAccJerkMean),gravityMean)  \
count                  7352.000000                           7352.000000   
mean                      0.008684                              0.002186   
std                       0.336787                              0.448306   
min                      -0.976580                             -1.000000   
25%                      -0.121527                             -0.289549   
50%                       0.009509                              0.008943   
75%                       0.150865                              0.292861   
max                       1.000000                              1.000000   

       angle(tBodyGyroMean,gravityMean)  angle(tBodyGyroJerkMean,gravityMean)  \
count                       7352.000000                           7352.000000   
mean                           0.008726                             -0.005981   
std                            0.608303                              0.477975   
min                           -1.000000                             -1.000000   
25%                           -0.482273                             -0.376341   
50%                            0.008735                             -0.000368   
75%                            0.506187                              0.359368   
max                            0.998702                              0.996078   

       angle(X,gravityMean)  angle(Y,gravityMean)  angle(Z,gravityMean)  \
count           7352.000000           7352.000000           7352.000000   
mean              -0.489547              0.058593             -0.056515   
std                0.511807              0.297480              0.279122   
min               -1.000000             -1.000000             -1.000000   
25%               -0.812065             -0.017885             -0.143414   
50%               -0.709417              0.182071              0.003181   
75%               -0.509079              0.248353              0.107659   
max                1.000000              0.478157              1.000000   

           subject  
count  7352.000000  
mean     17.413085  
std       8.975143  
min       1.000000  
25%       8.000000  
50%      19.000000  
75%      26.000000  
max      30.000000  

[8 rows x 562 columns]

Unique values per column:
tBodyAcc-mean()-X       7347
tBodyAcc-mean()-Y       7352
tBodyAcc-mean()-Z       7349
tBodyAcc-std()-X        7349
tBodyAcc-std()-Y        7351
                        ... 
angle(X,gravityMean)    7352
angle(Y,gravityMean)    7352
angle(Z,gravityMean)    7352
subject                   21
Activity                   6
Length: 563, dtype: int64

Categorical Columns: Index(['Activity'], dtype='object')

Numerical Columns: Index(['tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z',
       'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z',
       'tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y', 'tBodyAcc-mad()-Z',
       'tBodyAcc-max()-X',
       ...
       'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-kurtosis()',
       'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
       'angle(tBodyGyroMean,gravityMean)',
       'angle(tBodyGyroJerkMean,gravityMean)', 'angle(X,gravityMean)',
       'angle(Y,gravityMean)', 'angle(Z,gravityMean)', 'subject'],
      dtype='object', length=562)
No description has been provided for this image
--- MISSING DATA ---
Series([], dtype: int64)

--- DUPLICATES ---
Number of duplicate rows: 0
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
--- Preprocessing Data ---
Removing 256 highly correlated features: ['tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y', 'tBodyAcc-mad()-Z', 'tBodyAcc-max()-X', 'tBodyAcc-sma()', 'tBodyAcc-iqr()-X', 'tBodyAcc-iqr()-Y', 'tBodyAcc-iqr()-Z', 'tGravityAcc-mad()-X', 'tGravityAcc-mad()-Y', 'tGravityAcc-mad()-Z', 'tGravityAcc-max()-X', 'tGravityAcc-max()-Y', 'tGravityAcc-max()-Z', 'tGravityAcc-min()-X', 'tGravityAcc-min()-Y', 'tGravityAcc-min()-Z', 'tGravityAcc-energy()-X', 'tGravityAcc-iqr()-X', 'tGravityAcc-iqr()-Y', 'tGravityAcc-iqr()-Z', 'tGravityAcc-arCoeff()-X,2', 'tGravityAcc-arCoeff()-X,3', 'tGravityAcc-arCoeff()-X,4', 'tGravityAcc-arCoeff()-Y,2', 'tGravityAcc-arCoeff()-Y,3', 'tGravityAcc-arCoeff()-Y,4', 'tGravityAcc-arCoeff()-Z,2', 'tGravityAcc-arCoeff()-Z,3', 'tGravityAcc-arCoeff()-Z,4', 'tBodyAccJerk-std()-X', 'tBodyAccJerk-mad()-X', 'tBodyAccJerk-mad()-Y', 'tBodyAccJerk-mad()-Z', 'tBodyAccJerk-sma()', 'tBodyAccJerk-iqr()-X', 'tBodyAccJerk-iqr()-Y', 'tBodyAccJerk-iqr()-Z', 'tBodyAccJerk-entropy()-Y', 'tBodyAccJerk-entropy()-Z', 'tBodyGyro-mad()-X', 'tBodyGyro-mad()-Y', 'tBodyGyro-mad()-Z', 'tBodyGyro-iqr()-X', 'tBodyGyro-iqr()-Y', 'tBodyGyro-iqr()-Z', 'tBodyGyroJerk-mad()-X', 'tBodyGyroJerk-mad()-Y', 'tBodyGyroJerk-mad()-Z', 'tBodyGyroJerk-max()-X', 'tBodyGyroJerk-max()-Z', 'tBodyGyroJerk-sma()', 'tBodyGyroJerk-iqr()-X', 'tBodyGyroJerk-iqr()-Y', 'tBodyGyroJerk-iqr()-Z', 'tBodyGyroJerk-entropy()-Z', 'tBodyGyroJerk-arCoeff()-Z,1', 'tBodyAccMag-mean()', 'tBodyAccMag-mad()', 'tBodyAccMag-max()', 'tBodyAccMag-sma()', 'tBodyAccMag-iqr()', 'tGravityAccMag-mean()', 'tGravityAccMag-std()', 'tGravityAccMag-mad()', 'tGravityAccMag-max()', 'tGravityAccMag-min()', 'tGravityAccMag-sma()', 'tGravityAccMag-energy()', 'tGravityAccMag-iqr()', 'tGravityAccMag-entropy()', 'tGravityAccMag-arCoeff()1', 'tGravityAccMag-arCoeff()2', 'tGravityAccMag-arCoeff()3', 'tGravityAccMag-arCoeff()4', 'tBodyAccJerkMag-mean()', 'tBodyAccJerkMag-std()', 'tBodyAccJerkMag-mad()', 'tBodyAccJerkMag-max()', 'tBodyAccJerkMag-sma()', 'tBodyAccJerkMag-energy()', 'tBodyAccJerkMag-iqr()', 'tBodyAccJerkMag-entropy()', 'tBodyGyroMag-mean()', 'tBodyGyroMag-mad()', 'tBodyGyroMag-max()', 'tBodyGyroMag-sma()', 'tBodyGyroMag-iqr()', 'tBodyGyroJerkMag-mean()', 'tBodyGyroJerkMag-std()', 'tBodyGyroJerkMag-mad()', 'tBodyGyroJerkMag-max()', 'tBodyGyroJerkMag-sma()', 'tBodyGyroJerkMag-iqr()', 'tBodyGyroJerkMag-entropy()', 'fBodyAcc-mean()-X', 'fBodyAcc-mean()-Y', 'fBodyAcc-mean()-Z', 'fBodyAcc-std()-X', 'fBodyAcc-std()-Y', 'fBodyAcc-std()-Z', 'fBodyAcc-mad()-X', 'fBodyAcc-mad()-Y', 'fBodyAcc-mad()-Z', 'fBodyAcc-max()-X', 'fBodyAcc-max()-Y', 'fBodyAcc-max()-Z', 'fBodyAcc-sma()', 'fBodyAcc-energy()-X', 'fBodyAcc-energy()-Z', 'fBodyAcc-iqr()-Z', 'fBodyAcc-entropy()-X', 'fBodyAcc-entropy()-Y', 'fBodyAcc-entropy()-Z', 'fBodyAcc-kurtosis()-X', 'fBodyAcc-kurtosis()-Y', 'fBodyAcc-kurtosis()-Z', 'fBodyAcc-bandsEnergy()-1,8', 'fBodyAcc-bandsEnergy()-1,16', 'fBodyAcc-bandsEnergy()-17,32', 'fBodyAcc-bandsEnergy()-33,48', 'fBodyAcc-bandsEnergy()-49,64', 'fBodyAcc-bandsEnergy()-1,24', 'fBodyAcc-bandsEnergy()-25,48', 'fBodyAcc-bandsEnergy()-1,16.1', 'fBodyAcc-bandsEnergy()-17,32.1', 'fBodyAcc-bandsEnergy()-33,48.1', 'fBodyAcc-bandsEnergy()-49,64.1', 'fBodyAcc-bandsEnergy()-1,24.1', 'fBodyAcc-bandsEnergy()-25,48.1', 'fBodyAcc-bandsEnergy()-1,16.2', 'fBodyAcc-bandsEnergy()-17,32.2', 'fBodyAcc-bandsEnergy()-33,48.2', 'fBodyAcc-bandsEnergy()-49,64.2', 'fBodyAcc-bandsEnergy()-1,24.2', 'fBodyAcc-bandsEnergy()-25,48.2', 'fBodyAccJerk-mean()-X', 'fBodyAccJerk-mean()-Y', 'fBodyAccJerk-mean()-Z', 'fBodyAccJerk-std()-X', 'fBodyAccJerk-std()-Y', 'fBodyAccJerk-std()-Z', 'fBodyAccJerk-mad()-X', 'fBodyAccJerk-mad()-Y', 'fBodyAccJerk-mad()-Z', 'fBodyAccJerk-max()-X', 'fBodyAccJerk-max()-Y', 'fBodyAccJerk-max()-Z', 'fBodyAccJerk-sma()', 'fBodyAccJerk-energy()-X', 'fBodyAccJerk-energy()-Y', 'fBodyAccJerk-energy()-Z', 'fBodyAccJerk-iqr()-X', 'fBodyAccJerk-iqr()-Y', 'fBodyAccJerk-iqr()-Z', 'fBodyAccJerk-entropy()-X', 'fBodyAccJerk-entropy()-Y', 'fBodyAccJerk-entropy()-Z', 'fBodyAccJerk-bandsEnergy()-1,8', 'fBodyAccJerk-bandsEnergy()-9,16', 'fBodyAccJerk-bandsEnergy()-17,24', 'fBodyAccJerk-bandsEnergy()-25,32', 'fBodyAccJerk-bandsEnergy()-17,32', 'fBodyAccJerk-bandsEnergy()-49,64', 'fBodyAccJerk-bandsEnergy()-1,24', 'fBodyAccJerk-bandsEnergy()-9,16.1', 'fBodyAccJerk-bandsEnergy()-17,24.1', 'fBodyAccJerk-bandsEnergy()-25,32.1', 'fBodyAccJerk-bandsEnergy()-1,16.1', 'fBodyAccJerk-bandsEnergy()-17,32.1', 'fBodyAccJerk-bandsEnergy()-49,64.1', 'fBodyAccJerk-bandsEnergy()-1,24.1', 'fBodyAccJerk-bandsEnergy()-25,48.1', 'fBodyAccJerk-bandsEnergy()-9,16.2', 'fBodyAccJerk-bandsEnergy()-17,24.2', 'fBodyAccJerk-bandsEnergy()-25,32.2', 'fBodyAccJerk-bandsEnergy()-33,40.2', 'fBodyAccJerk-bandsEnergy()-1,16.2', 'fBodyAccJerk-bandsEnergy()-17,32.2', 'fBodyAccJerk-bandsEnergy()-33,48.2', 'fBodyAccJerk-bandsEnergy()-49,64.2', 'fBodyAccJerk-bandsEnergy()-1,24.2', 'fBodyAccJerk-bandsEnergy()-25,48.2', 'fBodyGyro-mean()-X', 'fBodyGyro-mean()-Y', 'fBodyGyro-mean()-Z', 'fBodyGyro-std()-X', 'fBodyGyro-std()-Y', 'fBodyGyro-std()-Z', 'fBodyGyro-mad()-X', 'fBodyGyro-mad()-Y', 'fBodyGyro-mad()-Z', 'fBodyGyro-max()-X', 'fBodyGyro-max()-Z', 'fBodyGyro-sma()', 'fBodyGyro-energy()-Y', 'fBodyGyro-energy()-Z', 'fBodyGyro-entropy()-X', 'fBodyGyro-entropy()-Y', 'fBodyGyro-entropy()-Z', 'fBodyGyro-kurtosis()-X', 'fBodyGyro-kurtosis()-Y', 'fBodyGyro-kurtosis()-Z', 'fBodyGyro-bandsEnergy()-1,8', 'fBodyGyro-bandsEnergy()-1,16', 'fBodyGyro-bandsEnergy()-17,32', 'fBodyGyro-bandsEnergy()-33,48', 'fBodyGyro-bandsEnergy()-49,64', 'fBodyGyro-bandsEnergy()-1,24', 'fBodyGyro-bandsEnergy()-25,48', 'fBodyGyro-bandsEnergy()-17,32.1', 'fBodyGyro-bandsEnergy()-33,48.1', 'fBodyGyro-bandsEnergy()-49,64.1', 'fBodyGyro-bandsEnergy()-1,24.1', 'fBodyGyro-bandsEnergy()-25,48.1', 'fBodyGyro-bandsEnergy()-1,16.2', 'fBodyGyro-bandsEnergy()-17,32.2', 'fBodyGyro-bandsEnergy()-33,48.2', 'fBodyGyro-bandsEnergy()-49,64.2', 'fBodyGyro-bandsEnergy()-1,24.2', 'fBodyGyro-bandsEnergy()-25,48.2', 'fBodyAccMag-mean()', 'fBodyAccMag-std()', 'fBodyAccMag-mad()', 'fBodyAccMag-max()', 'fBodyAccMag-sma()', 'fBodyAccMag-iqr()', 'fBodyAccMag-entropy()', 'fBodyAccMag-kurtosis()', 'fBodyBodyAccJerkMag-mean()', 'fBodyBodyAccJerkMag-std()', 'fBodyBodyAccJerkMag-mad()', 'fBodyBodyAccJerkMag-max()', 'fBodyBodyAccJerkMag-sma()', 'fBodyBodyAccJerkMag-energy()', 'fBodyBodyAccJerkMag-iqr()', 'fBodyBodyAccJerkMag-entropy()', 'fBodyBodyAccJerkMag-kurtosis()', 'fBodyBodyGyroMag-mean()', 'fBodyBodyGyroMag-std()', 'fBodyBodyGyroMag-mad()', 'fBodyBodyGyroMag-max()', 'fBodyBodyGyroMag-sma()', 'fBodyBodyGyroMag-entropy()', 'fBodyBodyGyroMag-kurtosis()', 'fBodyBodyGyroJerkMag-mean()', 'fBodyBodyGyroJerkMag-std()', 'fBodyBodyGyroJerkMag-mad()', 'fBodyBodyGyroJerkMag-max()', 'fBodyBodyGyroJerkMag-sma()', 'fBodyBodyGyroJerkMag-energy()', 'fBodyBodyGyroJerkMag-iqr()', 'fBodyBodyGyroJerkMag-entropy()', 'angle(X,gravityMean)', 'angle(Y,gravityMean)', 'angle(Z,gravityMean)']
Dropping 9 features with VIF > 10: ['angle(tBodyGyroJerkMean,gravityMean)', 'angle(tBodyGyroMean,gravityMean)', 'angle(tBodyAccJerkMean),gravityMean)', 'angle(tBodyAccMean,gravity)', 'fBodyBodyGyroJerkMag-kurtosis()', 'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-meanFreq()', 'fBodyBodyGyroJerkMag-maxInds', 'fBodyBodyGyroJerkMag-min()']
No description has been provided for this image
In [461]:
X_train_input = X_train
y_train_input = y_train
X_test_input = X_test
y_test_input = y_test
scaler_input = scaler
pca_input=pca
encoder_input=encoder
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

print("X_train_input shape:", X_train_input.shape)
print("y_train_input shape:", y_train_input.shape)
print("X_test_input shape:", X_test_input.shape)
print("y_test_input shape:", y_test_input.shape)
X_train shape: (8442, 131)
y_train shape: (8442,)
X_test shape: (2947, 131)
y_test shape: (2947,)
X_train_input shape: (8442, 131)
y_train_input shape: (8442,)
X_test_input shape: (2947, 131)
y_test_input shape: (2947,)
In [462]:
# Traing and evaluate RandomForest Model
In [463]:
y_train_input
Out[463]:
array([2, 2, 2, ..., 5, 5, 5])
In [470]:
model_random_forest, y_pred_random_forest = randomforest_model_training_and_evaluation("RandomForest", X_train_input, y_train_input, X_test_input, y_test_input)
--- Training and Evaluating RandomForest Model ---
Best hyperparameters: {'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 200}
RandomForest model initialized with tuned hyperparameters.
Cross-validation accuracy scores: [0.95677916 0.97276495 0.9549763  0.95734597 0.96445498]
Mean cross-validation accuracy: 0.9613
RandomForest model trained on the full training set.
Model trained.
Predictions made on the test set.
Test Accuracy: 86.43%
Test F1-score: 0.8644
Test Precision: 0.8666
Test Recall: 0.8643

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       537
           1       0.80      0.79      0.79       491
           2       0.82      0.83      0.83       532
           3       0.87      0.91      0.89       496
           4       0.77      0.84      0.81       420
           5       0.93      0.81      0.87       471

    accuracy                           0.86      2947
   macro avg       0.86      0.86      0.86      2947
weighted avg       0.87      0.86      0.86      2947

No description has been provided for this image
Model evaluated.
In [471]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# The model outputs probabilities, converting into class labels
if y_pred_random_forest.ndim > 1:
    y_pred_random_forest = y_pred_random_forest.argmax(axis=1)

# Mapping predicted labels to target names
predicted_target_names = [target_names[i] for i in y_pred]

# Mapping original labels to target names
original_target_names = [target_names[i] for i in y_test]

# Printing top 2 predictions at an offset of 100, repeating 10 times
offset = 100
num_samples = 2
num_offsets = 10

print(f"Top {num_samples} Predictions at Offset of {offset} (Repeated {num_offsets} Times):")
for i in range(num_offsets):
    start_idx = i * offset
    end_idx = start_idx + num_samples
    print(f"\nOffset {start_idx} to {end_idx - 1}:")
    for j in range(start_idx, end_idx):
        print(f"Sample {j}: Predicted = {predicted_target_names[j]}, Original = {original_target_names[j]}")
X_train shape: (8442, 131)
y_train shape: (8442,)
Top 2 Predictions at Offset of 100 (Repeated 10 Times):

Offset 0 to 1:
Sample 0: Predicted = LAYING, Original = LAYING
Sample 1: Predicted = LAYING, Original = LAYING

Offset 100 to 101:
Sample 100: Predicted = WALKING, Original = WALKING
Sample 101: Predicted = WALKING, Original = WALKING

Offset 200 to 201:
Sample 200: Predicted = SITTING, Original = SITTING
Sample 201: Predicted = SITTING, Original = SITTING

Offset 300 to 301:
Sample 300: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 301: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS

Offset 400 to 401:
Sample 400: Predicted = WALKING, Original = WALKING
Sample 401: Predicted = WALKING, Original = WALKING

Offset 500 to 501:
Sample 500: Predicted = SITTING, Original = SITTING
Sample 501: Predicted = SITTING, Original = SITTING

Offset 600 to 601:
Sample 600: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 601: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_UPSTAIRS

Offset 700 to 701:
Sample 700: Predicted = WALKING, Original = WALKING
Sample 701: Predicted = WALKING, Original = WALKING

Offset 800 to 801:
Sample 800: Predicted = LAYING, Original = SITTING
Sample 801: Predicted = LAYING, Original = SITTING

Offset 900 to 901:
Sample 900: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
Sample 901: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
In [472]:
# Traing and evaluate RandomForest Model
In [473]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [474]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [475]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [476]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [487]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score
import logging

def svm_tune_hyperparameters(X_train, y_train):
    """Perform hyperparameter tuning using GridSearchCV."""
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf']
    }
    grid_search = GridSearchCV(
        SVC(random_state=42),
        param_grid,
        cv=5,
        scoring='accuracy',
        n_jobs=-1
    )
    grid_search.fit(X_train, y_train)
    print(f"Best hyperparameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

def cross_validate(model, X_train, y_train):
    """Perform cross-validation on the training data."""
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    return cv_scores

def svm_model_training_and_evaluation(X_train, y_train, X_test, y_test, algorithm_name="SVM"):
    """Train, evaluate, and save an SVM model."""
    print(f"Training {algorithm_name} model...")

    # Hyperparameter tuning
    print("Starting hyperparameter tuning...")
    best_model = svm_tune_hyperparameters(X_train, y_train)
    print("Hyperparameter tuning completed.")

    # Initialize the model
    model_svm = SVC(
        C=best_model.C,
        kernel=best_model.kernel,
        random_state=42,
        probability=True  # Enable probability estimates
    )
    print(f"{algorithm_name} model initialized with tuned hyperparameters.")

    # Perform cross-validation
    logging.info("Performing cross-validation...")
    cv_scores = cross_validate(model_svm, X_train, y_train)
    print(f"Cross-validation accuracy scores: {cv_scores}")
    print(f"Mean cross-validation accuracy: {cv_scores.mean():.4f}")

    # Train the model
    model_svm.fit(X_train, y_train)
    print("Model trained.")

    #  predictions on the test set
    y_pred_svm = model_svm.predict(X_test)
    print("Predictions made on the test set.")

    # Evaluate the model
    evaluate_model(algorithm_name, y_test, y_pred_svm)
    print("Model evaluated.")

    return model_svm, y_pred_svm
In [488]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
X_train shape: (8442, 131)
y_train shape: (8442,)
In [489]:
# Train and evaluate the SVM model
In [490]:
model_svm, y_pred_svm = svm_model_training_and_evaluation(X_train, y_train, X_test, y_test)
Training SVM model...
Starting hyperparameter tuning...
Best hyperparameters: {'C': 10, 'kernel': 'rbf'}
Hyperparameter tuning completed.
SVM model initialized with tuned hyperparameters.
Cross-validation accuracy scores: [0.98046181 0.97039668 0.98163507 0.98459716 0.96327014]
Mean cross-validation accuracy: 0.9761
Model trained.
Predictions made on the test set.
Test Accuracy: 94.74%
Test F1-score: 0.9472
Test Precision: 0.9482
Test Recall: 0.9474

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.94      0.87      0.90       491
           2       0.89      0.95      0.92       532
           3       0.94      0.98      0.96       496
           4       0.97      0.93      0.95       420
           5       0.96      0.95      0.95       471

    accuracy                           0.95      2947
   macro avg       0.95      0.95      0.95      2947
weighted avg       0.95      0.95      0.95      2947

No description has been provided for this image
Model evaluated.
In [491]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# The model outputs probabilities, converting into class labels
if y_pred_svm.ndim > 1:
    y_pred_svm = y_pred_svm.argmax(axis=1)

# Mapping predicted labels to target names
predicted_target_names = [target_names[i] for i in y_pred]

# Mapping original labels to target names
original_target_names = [target_names[i] for i in y_test]

# Printing top 2 predictions at an offset of 100, repeating 10 times
offset = 100
num_samples = 2
num_offsets = 10

print(f"Top {num_samples} Predictions at Offset of {offset} (Repeated {num_offsets} Times):")
for i in range(num_offsets):
    start_idx = i * offset
    end_idx = start_idx + num_samples
    print(f"\nOffset {start_idx} to {end_idx - 1}:")
    for j in range(start_idx, end_idx):
        print(f"Sample {j}: Predicted = {predicted_target_names[j]}, Original = {original_target_names[j]}")
X_train shape: (8442, 131)
y_train shape: (8442,)
Top 2 Predictions at Offset of 100 (Repeated 10 Times):

Offset 0 to 1:
Sample 0: Predicted = LAYING, Original = LAYING
Sample 1: Predicted = LAYING, Original = LAYING

Offset 100 to 101:
Sample 100: Predicted = WALKING, Original = WALKING
Sample 101: Predicted = WALKING, Original = WALKING

Offset 200 to 201:
Sample 200: Predicted = SITTING, Original = SITTING
Sample 201: Predicted = SITTING, Original = SITTING

Offset 300 to 301:
Sample 300: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 301: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS

Offset 400 to 401:
Sample 400: Predicted = WALKING, Original = WALKING
Sample 401: Predicted = WALKING, Original = WALKING

Offset 500 to 501:
Sample 500: Predicted = SITTING, Original = SITTING
Sample 501: Predicted = SITTING, Original = SITTING

Offset 600 to 601:
Sample 600: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 601: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_UPSTAIRS

Offset 700 to 701:
Sample 700: Predicted = WALKING, Original = WALKING
Sample 701: Predicted = WALKING, Original = WALKING

Offset 800 to 801:
Sample 800: Predicted = LAYING, Original = SITTING
Sample 801: Predicted = LAYING, Original = SITTING

Offset 900 to 901:
Sample 900: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
Sample 901: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
In [492]:
# LR Schedule
In [493]:
def lr_schedule(epoch):
    #Learning rate schedule for the CNN model.
    initial_lr = 0.001
    drop = 0.5
    epochs_drop = 10
    lr = initial_lr * (drop ** (epoch // epochs_drop))
    return lr
In [494]:
# CNN Evalute Model
In [495]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [496]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [497]:
# -----------------------------------------------------------------------------------------------------------------------------------
In [503]:
unique_classes, class_counts = np.unique(y_train, return_counts=True)
print("Class distribution in y_train:", dict(zip(unique_classes, class_counts)))
Class distribution in y_train: {np.int64(0): np.int64(1407), np.int64(1): np.int64(1407), np.int64(2): np.int64(1407), np.int64(3): np.int64(1407), np.int64(4): np.int64(1407), np.int64(5): np.int64(1407)}
In [504]:
subject_encoder
Out[504]:
OneHotEncoder(handle_unknown='ignore', sparse_output=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
OneHotEncoder(handle_unknown='ignore', sparse_output=False)
In [505]:
# Reshape X_train and X_test to 3D (samples, timesteps, features)
X_train_reshaped = np.expand_dims(X_train, axis=-1)  # Shape: (samples, timesteps, 1)
X_test_reshaped = np.expand_dims(X_test, axis=-1)    # Shape: (samples, timesteps, 1)
# X_train_final, y_train, X_test_final, y_test

# Convert features to float32
X_train_reshaped = X_train_reshaped.astype('float32')
X_test_reshaped = X_test_reshaped.astype('float32')

# Verify the conversion
print("X_train_reshaped shape:", X_train_reshaped.shape)
print("X_test_reshaped shape:", X_test_reshaped.shape)
print("NaN in X_train:", np.isnan(X_train_reshaped).sum())
print("NaN in X_test:", np.isnan(X_test_reshaped).sum())

# Define input shape and number of classes
input_shape = (X_train_reshaped.shape[1], X_train_reshaped.shape[2])  # (timesteps, features)
num_classes = len(np.unique(y_train))  # Number of unique classes in y_train
print("Input shape:", input_shape)
print("Number of classes:", num_classes)
X_train_reshaped shape: (8442, 131, 1)
X_test_reshaped shape: (2947, 131, 1)
NaN in X_train: 0
NaN in X_test: 0
Input shape: (131, 1)
Number of classes: 6
In [506]:
# Build an improved CNN-LSTM model
def build_improved_hybrid_model(input_shape, num_classes):
    """Build an improved CNN-LSTM model."""
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Dropout(0.3),
        
        Conv1D(128, 3, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Dropout(0.3),
        
        LSTM(64, return_sequences=True),
        Dropout(0.3),
        
        LSTM(32),
        Dropout(0.3),
        
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model
In [507]:
# Build the model
model_hybrid_lstm = build_improved_hybrid_model(input_shape, num_classes)
model_hybrid_lstm.summary()

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model_hybrid_lstm.fit(
    X_train_reshaped, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping]
)

# Evaluate the model
test_loss, test_accuracy = model_hybrid_lstm.evaluate(X_test_reshaped, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions
y_pred_hybrid_lstm = model_hybrid_lstm.predict(X_test_reshaped).argmax(axis=1)

# Classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
/Users/dheerajkumar/miniconda3/lib/python3.12/site-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Model: "sequential_13"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv1d_30 (Conv1D)              │ (None, 129, 64)        │           256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_41          │ (None, 129, 64)        │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling1d_30 (MaxPooling1D) │ (None, 64, 64)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_55 (Dropout)            │ (None, 64, 64)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv1d_31 (Conv1D)              │ (None, 62, 128)        │        24,704 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_42          │ (None, 62, 128)        │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling1d_31 (MaxPooling1D) │ (None, 31, 128)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_56 (Dropout)            │ (None, 31, 128)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_16 (LSTM)                  │ (None, 31, 64)         │        49,408 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_57 (Dropout)            │ (None, 31, 64)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_17 (LSTM)                  │ (None, 32)             │        12,416 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_58 (Dropout)            │ (None, 32)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_26 (Dense)                │ (None, 64)             │         2,112 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_43          │ (None, 64)             │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_59 (Dropout)            │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_27 (Dense)                │ (None, 6)              │           390 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 90,310 (352.77 KB)
 Trainable params: 89,798 (350.77 KB)
 Non-trainable params: 512 (2.00 KB)
Epoch 1/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step - accuracy: 0.1752 - loss: 2.1726 - val_accuracy: 0.0746 - val_loss: 2.3101
Epoch 2/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 18ms/step - accuracy: 0.2341 - loss: 1.8219 - val_accuracy: 0.0746 - val_loss: 3.2077
Epoch 3/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 18ms/step - accuracy: 0.2567 - loss: 1.7019 - val_accuracy: 0.1883 - val_loss: 1.7953
Epoch 4/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 20ms/step - accuracy: 0.3608 - loss: 1.3158 - val_accuracy: 0.3961 - val_loss: 1.0940
Epoch 5/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 17ms/step - accuracy: 0.4445 - loss: 1.1055 - val_accuracy: 0.4506 - val_loss: 0.9485
Epoch 6/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 17ms/step - accuracy: 0.5679 - loss: 0.8667 - val_accuracy: 0.6963 - val_loss: 0.7277
Epoch 7/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.6564 - loss: 0.7640 - val_accuracy: 0.7626 - val_loss: 0.6561
Epoch 8/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.7074 - loss: 0.6910 - val_accuracy: 0.7229 - val_loss: 0.6885
Epoch 9/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.7529 - loss: 0.6427 - val_accuracy: 0.7425 - val_loss: 0.7042
Epoch 10/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.7662 - loss: 0.5842 - val_accuracy: 0.7975 - val_loss: 0.5422
Epoch 11/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.7815 - loss: 0.5734 - val_accuracy: 0.7774 - val_loss: 0.5349
Epoch 12/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8093 - loss: 0.5148 - val_accuracy: 0.8561 - val_loss: 0.4111
Epoch 13/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8103 - loss: 0.5131 - val_accuracy: 0.8271 - val_loss: 0.4774
Epoch 14/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 16ms/step - accuracy: 0.8155 - loss: 0.4803 - val_accuracy: 0.8425 - val_loss: 0.4370
Epoch 15/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 19ms/step - accuracy: 0.8394 - loss: 0.4177 - val_accuracy: 0.8212 - val_loss: 0.4356
Epoch 16/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8319 - loss: 0.4134 - val_accuracy: 0.8857 - val_loss: 0.3215
Epoch 17/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8553 - loss: 0.3734 - val_accuracy: 0.8899 - val_loss: 0.3010
Epoch 18/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8517 - loss: 0.3768 - val_accuracy: 0.8514 - val_loss: 0.4520
Epoch 19/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8633 - loss: 0.3482 - val_accuracy: 0.8757 - val_loss: 0.3368
Epoch 20/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8605 - loss: 0.3646 - val_accuracy: 0.9218 - val_loss: 0.2048
Epoch 21/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8838 - loss: 0.3079 - val_accuracy: 0.8940 - val_loss: 0.2708
Epoch 22/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 17ms/step - accuracy: 0.8883 - loss: 0.3056 - val_accuracy: 0.9100 - val_loss: 0.2230
Epoch 23/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8822 - loss: 0.3237 - val_accuracy: 0.8976 - val_loss: 0.2478
Epoch 24/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 16ms/step - accuracy: 0.8944 - loss: 0.2879 - val_accuracy: 0.9041 - val_loss: 0.2609
Epoch 25/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 4s 18ms/step - accuracy: 0.8996 - loss: 0.2646 - val_accuracy: 0.9177 - val_loss: 0.2178
93/93 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - accuracy: 0.8464 - loss: 0.4384
Test Accuracy: 84.66%
93/93 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       537
           1       0.80      0.79      0.79       491
           2       0.82      0.83      0.83       532
           3       0.87      0.91      0.89       496
           4       0.77      0.84      0.81       420
           5       0.93      0.81      0.87       471

    accuracy                           0.86      2947
   macro avg       0.86      0.86      0.86      2947
weighted avg       0.87      0.86      0.86      2947

Confusion Matrix:
[[529   8   0   0   0   0]
 [ 11 386  91   0   0   3]
 [  0  88 444   0   0   0]
 [  0   0   0 452  42   2]
 [  0   0   0  45 353  22]
 [  0   0   4  23  61 383]]
In [508]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# The model outputs probabilities, converting into class labels
if y_pred_hybrid_lstm.ndim > 1:
    y_pred_hybrid_lstm = y_pred_hybrid_lstm.argmax(axis=1)

# Mapping predicted labels to target names
predicted_target_names = [target_names[i] for i in y_pred]

# Mapping original labels to target names
original_target_names = [target_names[i] for i in y_test]

# Printing top 2 predictions at an offset of 100, repeating 10 times
offset = 100
num_samples = 2
num_offsets = 10

print(f"Top {num_samples} Predictions at Offset of {offset} (Repeated {num_offsets} Times):")
for i in range(num_offsets):
    start_idx = i * offset
    end_idx = start_idx + num_samples
    print(f"\nOffset {start_idx} to {end_idx - 1}:")
    for j in range(start_idx, end_idx):
        print(f"Sample {j}: Predicted = {predicted_target_names[j]}, Original = {original_target_names[j]}")
X_train shape: (8442, 131)
y_train shape: (8442,)
Top 2 Predictions at Offset of 100 (Repeated 10 Times):

Offset 0 to 1:
Sample 0: Predicted = LAYING, Original = LAYING
Sample 1: Predicted = LAYING, Original = LAYING

Offset 100 to 101:
Sample 100: Predicted = WALKING, Original = WALKING
Sample 101: Predicted = WALKING, Original = WALKING

Offset 200 to 201:
Sample 200: Predicted = SITTING, Original = SITTING
Sample 201: Predicted = SITTING, Original = SITTING

Offset 300 to 301:
Sample 300: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 301: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS

Offset 400 to 401:
Sample 400: Predicted = WALKING, Original = WALKING
Sample 401: Predicted = WALKING, Original = WALKING

Offset 500 to 501:
Sample 500: Predicted = SITTING, Original = SITTING
Sample 501: Predicted = SITTING, Original = SITTING

Offset 600 to 601:
Sample 600: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 601: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_UPSTAIRS

Offset 700 to 701:
Sample 700: Predicted = WALKING, Original = WALKING
Sample 701: Predicted = WALKING, Original = WALKING

Offset 800 to 801:
Sample 800: Predicted = LAYING, Original = SITTING
Sample 801: Predicted = LAYING, Original = SITTING

Offset 900 to 901:
Sample 900: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
Sample 901: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
In [510]:
# Build CNN Model
In [512]:
def cnn_model_training_and_evaluation(X_train, y_train, X_test, y_test):
    """
    Train and evaluate a CNN model.

    Parameters:
        X_train (np.array): Training data features.
        y_train (np.array): Training data labels.
        X_test (np.array): Test data features.
        y_test (np.array): Test data labels.

    Returns:
        model: Trained CNN model.
    """
    # Reshape data for CNN input
    X_train_reshaped = np.expand_dims(X_train, axis=-1)  # Shape: (samples, timesteps, 1)
    X_test_reshaped = np.expand_dims(X_test, axis=-1)    # Shape: (samples, timesteps, 1)

    # Define input shape and number of classes
    input_shape = (X_train_reshaped.shape[1], X_train_reshaped.shape[2])  # (timesteps, features)
    num_classes = len(np.unique(y_train))  # Number of unique classes in y_train
    print("Input shape:", input_shape)
    print("Number of classes:", num_classes)

    # Build the CNN model
    def build_cnn_model(input_shape, num_classes):
        """Build a CNN model."""
        model = Sequential([
            Conv1D(64, 3, activation='relu', input_shape=input_shape),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.3),
            
            Conv1D(128, 3, activation='relu'),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.3),
            
            Conv1D(256, 3, activation='relu'),
            BatchNormalization(),
            MaxPooling1D(2),
            Dropout(0.3),
            
            Flatten(),
            
            Dense(128, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            
            Dense(num_classes, activation='softmax')
        ])
        
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model

    # Build the model
    model = build_cnn_model(input_shape, num_classes)
    model.summary()

    # Train the model with early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    history = model.fit(
        X_train_reshaped, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test, verbose=0)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

    # Make predictions
    y_pred_cnn = model.predict(X_test_reshaped).argmax(axis=1)

    # Classification report and confusion matrix
    print("Classification Report:")
    print(classification_report(y_test, y_pred_cnn))

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred_cnn))

    return model
In [513]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# Train and evaluate the SVM model
cnn_model = cnn_model_training_and_evaluation(X_train, y_train, X_test, y_test)
X_train shape: (8442, 131)
y_train shape: (8442,)
Input shape: (131, 1)
Number of classes: 6
/Users/dheerajkumar/miniconda3/lib/python3.12/site-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Model: "sequential_14"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv1d_32 (Conv1D)              │ (None, 129, 64)        │           256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_44          │ (None, 129, 64)        │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling1d_32 (MaxPooling1D) │ (None, 64, 64)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_60 (Dropout)            │ (None, 64, 64)         │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv1d_33 (Conv1D)              │ (None, 62, 128)        │        24,704 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_45          │ (None, 62, 128)        │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling1d_33 (MaxPooling1D) │ (None, 31, 128)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_61 (Dropout)            │ (None, 31, 128)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv1d_34 (Conv1D)              │ (None, 29, 256)        │        98,560 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_46          │ (None, 29, 256)        │         1,024 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling1d_34 (MaxPooling1D) │ (None, 14, 256)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_62 (Dropout)            │ (None, 14, 256)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_5 (Flatten)             │ (None, 3584)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_28 (Dense)                │ (None, 128)            │       458,880 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_47          │ (None, 128)            │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_63 (Dropout)            │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_29 (Dense)                │ (None, 6)              │           774 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 585,478 (2.23 MB)
 Trainable params: 584,326 (2.23 MB)
 Non-trainable params: 1,152 (4.50 KB)
Epoch 1/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - accuracy: 0.4774 - loss: 1.4982 - val_accuracy: 0.1800 - val_loss: 3.6424
Epoch 2/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.8092 - loss: 0.4792 - val_accuracy: 0.3665 - val_loss: 2.5803
Epoch 3/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.8821 - loss: 0.3150 - val_accuracy: 0.9165 - val_loss: 0.2376
Epoch 4/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9142 - loss: 0.2301 - val_accuracy: 0.9663 - val_loss: 0.1087
Epoch 5/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9287 - loss: 0.1860 - val_accuracy: 0.9728 - val_loss: 0.0748
Epoch 6/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9470 - loss: 0.1569 - val_accuracy: 0.9805 - val_loss: 0.0602
Epoch 7/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9463 - loss: 0.1425 - val_accuracy: 0.9834 - val_loss: 0.0503
Epoch 8/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 3s 14ms/step - accuracy: 0.9558 - loss: 0.1167 - val_accuracy: 0.9763 - val_loss: 0.0649
Epoch 9/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9595 - loss: 0.1094 - val_accuracy: 0.9668 - val_loss: 0.0890
Epoch 10/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9597 - loss: 0.1136 - val_accuracy: 0.9763 - val_loss: 0.0552
Epoch 11/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9640 - loss: 0.1012 - val_accuracy: 0.9899 - val_loss: 0.0309
Epoch 12/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - accuracy: 0.9731 - loss: 0.0792 - val_accuracy: 0.9834 - val_loss: 0.0422
Epoch 13/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9739 - loss: 0.0743 - val_accuracy: 0.9840 - val_loss: 0.0436
Epoch 14/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9795 - loss: 0.0576 - val_accuracy: 0.9816 - val_loss: 0.0475
Epoch 15/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.9718 - loss: 0.0809 - val_accuracy: 0.9864 - val_loss: 0.0438
Epoch 16/50
212/212 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - accuracy: 0.9786 - loss: 0.0614 - val_accuracy: 0.9805 - val_loss: 0.0686
Test Accuracy: 91.48%
93/93 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.90      0.85      0.87       491
           2       0.87      0.92      0.89       532
           3       0.92      0.95      0.93       496
           4       0.84      0.93      0.88       420
           5       0.96      0.85      0.90       471

    accuracy                           0.91      2947
   macro avg       0.91      0.91      0.91      2947
weighted avg       0.92      0.91      0.91      2947

Confusion Matrix:
[[536   0   1   0   0   0]
 [  2 415  70   0   1   3]
 [  0  44 488   0   0   0]
 [  0   0   0 469  25   2]
 [  0   0   0  18 389  13]
 [  0   0   0  22  50 399]]
In [514]:
# Ensure X_train and y_train have the correct shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# The model outputs probabilities, converting into class labels
if y_pred_cnn.ndim > 1:
    y_pred_cnn = y_pred_cnn.argmax(axis=1)

# Mapping predicted labels to target names
predicted_target_names = [target_names[i] for i in y_pred]

# Mapping original labels to target names
original_target_names = [target_names[i] for i in y_test]

# Printing top 2 predictions at an offset of 100, repeating 10 times
offset = 100
num_samples = 2
num_offsets = 10

print(f"Top {num_samples} Predictions at Offset of {offset} (Repeated {num_offsets} Times):")
for i in range(num_offsets):
    start_idx = i * offset
    end_idx = start_idx + num_samples
    print(f"\nOffset {start_idx} to {end_idx - 1}:")
    for j in range(start_idx, end_idx):
        print(f"Sample {j}: Predicted = {predicted_target_names[j]}, Original = {original_target_names[j]}")
X_train shape: (8442, 131)
y_train shape: (8442,)
Top 2 Predictions at Offset of 100 (Repeated 10 Times):

Offset 0 to 1:
Sample 0: Predicted = LAYING, Original = LAYING
Sample 1: Predicted = LAYING, Original = LAYING

Offset 100 to 101:
Sample 100: Predicted = WALKING, Original = WALKING
Sample 101: Predicted = WALKING, Original = WALKING

Offset 200 to 201:
Sample 200: Predicted = SITTING, Original = SITTING
Sample 201: Predicted = SITTING, Original = SITTING

Offset 300 to 301:
Sample 300: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 301: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS

Offset 400 to 401:
Sample 400: Predicted = WALKING, Original = WALKING
Sample 401: Predicted = WALKING, Original = WALKING

Offset 500 to 501:
Sample 500: Predicted = SITTING, Original = SITTING
Sample 501: Predicted = SITTING, Original = SITTING

Offset 600 to 601:
Sample 600: Predicted = WALKING_UPSTAIRS, Original = WALKING_UPSTAIRS
Sample 601: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_UPSTAIRS

Offset 700 to 701:
Sample 700: Predicted = WALKING, Original = WALKING
Sample 701: Predicted = WALKING, Original = WALKING

Offset 800 to 801:
Sample 800: Predicted = LAYING, Original = SITTING
Sample 801: Predicted = LAYING, Original = SITTING

Offset 900 to 901:
Sample 900: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
Sample 901: Predicted = WALKING_DOWNSTAIRS, Original = WALKING_DOWNSTAIRS
In [ ]: